//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying gender
local female "female_genderize"
local male "male_genderize"

// gender of author is known
local known_gender "female_genderize!=."




//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// generate time elapsed between original article and comment
keep if author_id==1
generate day_numeric = 1
generate month_numeric = .
replace month_numeric=1 if month=="January"
replace month_numeric=2 if month=="February"
replace month_numeric=3 if month=="March"
replace month_numeric=4 if month=="April"
replace month_numeric=5 if month=="May"
replace month_numeric=6 if month=="June"
replace month_numeric=7 if month=="July"
replace month_numeric=8 if month=="August"
replace month_numeric=9 if month=="September"
replace month_numeric=10 if month=="October"
replace month_numeric=11 if month=="November"
replace month_numeric=12 if month=="December"
generate date = mdy(month_numeric, day_numeric, year)
generate date_original_article=.
local N = _N
forvalues i=1/`N' {
	if call_to[`i'] != . {
		local id = call_to[`i']
		sum date if article_id==`id'
		replace date_original_article = r(mean) if _n==`i'
	}
}
tostring year_call_to, replace  // for comments addressing articles published before 1999, set date of original article to midyear of recorded year of original article
replace date_original_article = date(year_call_to,"Y") + 182 if year_call_to!="."  // for comments addressing articles published before 1999, set date of original article to midyear of recorded year of original article
generate delay = date-date_original_article
sum delay, d

keep if comment
sort delay
cumul delay if comment==1, generate(cumul_delay)
line cumul_delay delay, sort plotregion(style(none)) scheme(s1color) title("") xtitle("Years elapsed between publication of original paper and comment") ytitle("Cumulative distribution") xlabel(0 "0" 365.25 "1" 730.5 "2" 1095.75 "3" 1461 "4" 1826.25 "5" 2191.5 "6" 2556.75 "7" 2922 "8" 3287.25 "9" 3652.5 "10" 4017.75 "11" 4383 "12" 4748.25 "13" 5113.5 "14" 5478.75 "15" 5844 "16" 6209.25 "17" 6574.5 "18" 6939.75 "19" 7305 "20" 7670.25 "21" 8035.5 "22", noticks labsize(medsmall)) lcolor(gs3%80) lwidth(thick) ylabel(0 .2 `"0.2"' .4 `"0.4"' .6 `"0.6"' .8 `"0.8"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1))
graph export "${output}/aer_time_between.eps", replace
graph export "${output}/aer_time_between.png", replace



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// generate time elapsed between original article and comment
keep if author_id==1
generate day_numeric = 1
generate month_numeric = .
replace month_numeric=1 if month=="January"
replace month_numeric=2 if month=="February"
replace month_numeric=3 if month=="March"
replace month_numeric=4 if month=="April"
replace month_numeric=5 if month=="May"
replace month_numeric=6 if month=="June"
replace month_numeric=7 if month=="July"
replace month_numeric=8 if month=="August"
replace month_numeric=9 if month=="September"
replace month_numeric=10 if month=="October"
replace month_numeric=11 if month=="November"
replace month_numeric=12 if month=="December"
generate date = mdy(month_numeric, day_numeric, year)
generate date_original_article=.
local N = _N
forvalues i=1/`N' {
	if call_to[`i'] != . {
		local id = call_to[`i']
		sum date if article_id==`id'
		replace date_original_article = r(mean) if _n==`i'
	}
}
tostring year_call_to month_call_to, replace  // for comments addressing articles published before start of dataset, use date in year_call_to and month_call_to for original article
generate month_year_call_to = month_call_to + year_call_to
replace date_original_article = date(month_year_call_to,"MY") if year_call_to!="."  // for comments addressing articles published before start of dataset, use date in year_call_to and month_call_to for original article
generate delay = date-date_original_article
sum delay, d

keep if comment
sort delay
cumul delay if comment==1, generate(cumul_delay)
line cumul_delay delay, sort plotregion(style(none)) scheme(s1color) title("") xtitle("Years elapsed between publication of original paper and comment") ytitle("Cumulative distribution") xlabel(0 "0" 365.25 "1" 730.5 "2" 1095.75 "3" 1461 "4" 1826.25 "5" 2191.5 "6" 2556.75 "7" 2922 "8" 3287.25 "9", noticks labsize(medsmall)) lcolor(gs3%80) lwidth(thick) ylabel(0 .2 `"0.2"' .4 `"0.4"' .6 `"0.6"' .8 `"0.8"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1))
graph export "${output}/asr_time_between.eps", replace
graph export "${output}/asr_time_between.png", replace



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate time elapsed between original article and comment
keep if author_id==1
generate month_numeric = .
replace month_numeric=1 if month=="January"
replace month_numeric=2 if month=="February"
replace month_numeric=3 if month=="March"
replace month_numeric=4 if month=="April"
replace month_numeric=5 if month=="May"
replace month_numeric=6 if month=="June"
replace month_numeric=7 if month=="July"
replace month_numeric=8 if month=="August"
replace month_numeric=9 if month=="September"
replace month_numeric=10 if month=="October"
replace month_numeric=11 if month=="November"
replace month_numeric=12 if month=="December"
generate date = mdy(month_numeric, day, year)
generate date_original_article=.
local N = _N
forvalues i=1/`N' {
	if call_to[`i'] != . {
		local id = call_to[`i']
		sum date if article_id==`id'
		replace date_original_article = r(mean) if _n==`i'
	}
}
tostring year_call_to month_call_to, replace  // for comments addressing articles published before start of dataset, use date in year_call_to and month_call_to for original article
generate month_year_call_to = month_call_to + year_call_to
replace date_original_article = date(month_year_call_to,"MY") if year_call_to!="."  // for comments addressing articles published before start of dataset, use date in year_call_to and month_call_to for original article
generate delay = date-date_original_article
sum delay, d

keep if comment
sort delay
cumul delay if comment==1, generate(cumul_delay)
line cumul_delay delay, sort plotregion(style(none)) scheme(s1color) title("") xtitle("Years elapsed between publication of original paper and comment") ytitle("Cumulative distribution") xlabel(0 "0" 365.25 "1" 730.5 "2" 1095.75 "3" 1461 "4" 1826.25 "5" 2191.5 "6" 2556.75 "7" 2922 "8" 3287.25 "9" 3652.5 "10" 4017.75 "11", noticks labsize(medsmall)) lcolor(gs3%80) lwidth(thick) ylabel(0 .2 `"0.2"' .4 `"0.4"' .6 `"0.6"' .8 `"0.8"' 1 `"1"', grid angle(horizontal) labsize(medsmall)) yscale(r(., 1))
graph export "${output}/nature_time_between.eps", replace
graph export "${output}/nature_time_between.png", replace


